"%&%" = function(a,b) paste(a,b,sep="")
library(ggplot2)
library(dplyr)
library(tidyr)
my.dir <- '/Volumes/im-lab/nas40t2/hwheeler/cross-tissue/BSLMM_exp/'

Compare 1M iterations (only 1421 genes finished in 100hrs) to 100K iterations (most genes finished in <100hrs, some still running)

mil<-read.table(my.dir %&% 'working_DGN-WB_exp_BSLMM_1M_iterations_genes_finished_in_100hrs_2015-06-16.txt',header=T)
hun<-read.table(my.dir %&% 'DGN-WB_exp_BSLMM-s100K_iterations_all_genes_2015-06-14.txt',header=T)
hun<-hun[complete.cases(hun),]
all<-inner_join(mil,hun,by='gene')
dim(all)
## [1] 1417   37
##plot diff in median PVE
ggplot(all,aes(x=pve50.x,y=pve50.y)) + geom_point() + xlab("median PVE (1M iterations)") + ylab("median PVE (100K iterations)") + geom_abline(c(0,1))

ggplot(all,aes(x=pve50.x,y=pve50.x-pve50.y)) + geom_point() + xlab("median PVE (1M iterations)") + ylab("med PVE (1M) - med PVE (100K)")

##plot diff in upper 95% credible interval PVE
ggplot(all,aes(x=pve975.x,y=pve975.y)) + geom_point() + xlab("upper CI PVE (1M iterations)") + ylab("upper CI PVE (100K iterations)") + geom_abline(c(0,1))

ggplot(all,aes(x=pve975.x,y=pve975.x-pve975.y)) + geom_point() + xlab("upper CI PVE (1M iterations)") + ylab("upper CI PVE (1M) - upper CI PVE (100K)")

##plot diff in lower 95% credible interval PVE
ggplot(all,aes(x=pve025.x,y=pve025.y)) + geom_point() + xlab("lower CI PVE (1M iterations)") + ylab("lower CI PVE (100K iterations)") + geom_abline(c(0,1))

ggplot(all,aes(x=pve025.x,y=pve025.x-pve025.y)) + geom_point() + xlab("lower CI PVE (1M iterations)") + ylab("lower CI PVE (1M) - lower CI PVE (100K)")

##plot diff in median PGE
ggplot(all,aes(x=pge50.x,y=pge50.y)) + geom_point() + xlab("median PGE (1M iterations)") + ylab("median PGE (100K iterations)") + geom_abline(c(0,1))

ggplot(all,aes(x=pge50.x,y=pge50.x-pge50.y)) + geom_point() + xlab("median PGE (1M iterations)") + ylab("med PGE (1M) - med PGE (100K)")

##plot diff in upper 95% credible interval PGE
ggplot(all,aes(x=pge975.x,y=pge975.y)) + geom_point() + xlab("upper CI PGE (1M iterations)") + ylab("upper CI PGE (100K iterations)") + geom_abline(c(0,1))

ggplot(all,aes(x=pge975.x,y=pge975.x-pge975.y)) + geom_point() + xlab("upper CI PGE (1M iterations)") + ylab("upper CI PGE (1M) - upper CI PGE (100K)")

##plot diff in lower 95% credible interval PGE
ggplot(all,aes(x=pge025.x,y=pge025.y)) + geom_point() + xlab("lower CI PGE (1M iterations)") + ylab("lower CI PGE (100K iterations)") + geom_abline(c(0,1))

ggplot(all,aes(x=pge025.x,y=pge025.x-pge025.y)) + geom_point() + xlab("lower CI PGE (1M iterations)") + ylab("lower CI PGE (1M) - lower CI PGE (100K)")

Plot PVE and PGE (100K iterations)

arrange by median PVE

data <- hun %>% arrange(pve50) %>% mutate(position=1:length(pve50),`pge025>0.01`=pge025>0.01) 
head(data,n=3L)
##      gene         h50       pve50     rho50     pge50       pi50 n_gamma50
## 1  SHCBP1 0.007489189 0.002255288 0.6748546 0.2452269 0.02470316         2
## 2  PEX11B 0.009904461 0.002262330 0.7816407 0.0995318 0.01386788         1
## 3 POLR3GL 0.012444085 0.002421142 0.7465017 0.1491072 0.01726753         1
##           h025       pve025     rho025 pge025       pi025 n_gamma025
## 1 0.0002431027 7.795688e-05 0.04658053      0 0.008353717          0
## 2 0.0002672425 6.494790e-05 0.06241255      0 0.007701220          0
## 3 0.0002618914 6.331537e-05 0.04661430      0 0.009215575          0
##        h975     pve975    rho975    pge975      pi975 n_gamma975 position
## 1 0.2583769 0.01672958 0.9960595 0.9583445 0.13095160         16        1
## 2 0.4109267 0.01385198 0.9977285 0.9449034 0.03397639          6        2
## 3 0.3093211 0.02252967 0.9968665 0.9399027 0.08992310          8        3
##   pge025>0.01
## 1       FALSE
## 2       FALSE
## 3       FALSE
tail(data,n=3L)
##           gene       h50     pve50     rho50     pge50       pi50
## 11495 TMEM176B 0.7503864 0.8450594 0.9979710 0.9987881 0.00514503
## 11496 HLA-DQA2 0.8727005 0.8454030 0.9981617 0.9976210 0.01233211
## 11497 HLA-DQB1 0.7047662 0.8590246 0.9921770 0.9976744 0.01490577
##       n_gamma50      h025    pve025    rho025    pge025       pi025
## 11495         7 0.6236936 0.8263025 0.9864737 0.9940242 0.004153668
## 11496         7 0.7816838 0.8316794 0.9899553 0.9914287 0.011385030
## 11497        10 0.4994558 0.8430376 0.9405188 0.9871328 0.008630525
##       n_gamma025      h975    pve975    rho975    pge975       pi975
## 11495          5 0.9226421 0.8542179 0.9998271 0.9999435 0.006703949
## 11496          6 0.9457489 0.8585409 0.9998575 0.9997468 0.013859300
## 11497          8 0.8340207 0.8713745 0.9998262 0.9998754 0.024743210
##       n_gamma975 position pge025>0.01
## 11495          9    11495        TRUE
## 11496          9    11496        TRUE
## 11497         15    11497        TRUE
ggplot(data,aes(x=position,y=pve50,ymin=pve025,ymax=pve975,col=`pge025>0.01`)) + geom_pointrange(col='gray')+geom_point()

ggplot(data,aes(x=position,y=pge50,ymin=pge025,ymax=pge975,col=`pge025>0.01`)) + geom_pointrange(col='gray')+geom_point()

ggplot(data,aes(x=position,y=n_gamma50,ymin=n_gamma025,ymax=n_gamma975,col=`pge025>0.01`)) + geom_pointrange(col='gray')+geom_point()

arrange by median PGE

data <- hun %>% arrange(pge50) %>% mutate(position=1:length(pge50),`pge025>0.01`=pge025>0.01) 
head(data,n=3L)
##      gene        h50       pve50     rho50        pge50        pi50
## 1   COX7C 0.01692984 0.003432102 0.8281494 0.0001469885 0.001273520
## 2   TOP2A 0.01844599 0.003377690 0.8615399 0.0091151460 0.001745283
## 3 CLEC18B 0.02054125 0.006086327 0.7558502 0.0126856400 0.001241263
##   n_gamma50         h025       pve025     rho025 pge025        pi025
## 1         1 0.0003328227 0.0001036626 0.07951984      0 0.0009400815
## 2         1 0.0004504753 0.0001032576 0.06883884      0 0.0011945301
## 3         1 0.0006393358 0.0002164674 0.04436956      0 0.0008424312
##   n_gamma025      h975     pve975    rho975    pge975       pi975
## 1          0 0.4096393 0.01789460 0.9981367 0.9269078 0.002704068
## 2          0 0.6704550 0.01887628 0.9993515 0.9440432 0.004614959
## 3          0 0.6201597 0.02877017 0.9980631 0.9162264 0.002540946
##   n_gamma975 position pge025>0.01
## 1          4        1       FALSE
## 2          4        2       FALSE
## 3          4        3       FALSE
tail(data,n=3L)
##           gene       h50     pve50     rho50     pge50        pi50
## 11495 C22orf32 0.7375050 0.7554564 0.9988335 0.9989030 0.002784994
## 11496    FBLN5 0.5201401 0.7271755 0.9959768 0.9989287 0.001934650
## 11497    ERAP1 0.7306787 0.8422865 0.9975138 0.9990240 0.002924236
##       n_gamma50      h025    pve025    rho025    pge025        pi025
## 11495         1 0.4262714 0.7344221 0.9827528 0.9923274 0.0016715230
## 11496         3 0.2449352 0.7027841 0.9504776 0.9899655 0.0009012359
## 11497         5 0.5228048 0.8280668 0.9763151 0.9938152 0.0020799960
##       n_gamma025      h975    pve975    rho975    pge975       pi975
## 11495          1 0.9110360 0.7683610 0.9999084 0.9998308 0.004217367
## 11496          3 0.7132473 0.7510260 0.9997500 0.9998127 0.003870395
## 11497          5 0.8333501 0.8557812 0.9996419 0.9998840 0.003394083
##       n_gamma975 position pge025>0.01
## 11495          3    11495        TRUE
## 11496          5    11496        TRUE
## 11497          7    11497        TRUE
ggplot(data,aes(x=position,y=pve50,ymin=pve025,ymax=pve975,col=`pge025>0.01`)) + geom_pointrange(col='gray')+geom_point()

ggplot(data,aes(x=position,y=pge50,ymin=pge025,ymax=pge975,col=`pge025>0.01`)) + geom_pointrange(col='gray')+geom_point()

ggplot(data,aes(x=position,y=n_gamma50,ymin=n_gamma025,ymax=n_gamma975,col=`pge025>0.01`)) + geom_pointrange(col='gray')+geom_point()

check correlations

ggplot(data,aes(x=pve50,y=pge50,ymin=pge025,ymax=pge975,col=`pge025>0.01`)) + geom_pointrange(col='gray') + geom_point() 

cor.test(hun$pge50,hun$pve50)
## 
##  Pearson's product-moment correlation
## 
## data:  hun$pge50 and hun$pve50
## t = 91.779, df = 11495, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.6396265 0.6607279
## sample estimates:
##       cor 
## 0.6503026
ggplot(data,aes(x=log10(n_gamma50),y=pge50,col=`pge025>0.01`)) + geom_point(alpha=0.3)